First we need to load our tools - including the tidyverse and also read in the data, which we can do directly in R.
library(tidyverse)
library(DT)
UFO <- read_rds("UFO_clean.rds")
US_2014 <- read_rds("UFO_2014.rds")
lower_states <- read_rds("lower_states.rds")
Now let’s make some plots and see what is possible in R.
Perhaps we are curious about how often each of the different shapes of UFOs were sighted.
ggplot(US_2014, aes(x = shape, fill = shape)) + geom_bar()+
theme(legend.position = "none" , text = element_text(size = 15), axis.text.x = element_text(angle = 45))
We can see that many of these shapes are rather similar, like other, unknown, and NA or oval. circle, disk, egg, and sphere, or even flash, light, and fireball. Let’s simplify these categories but only do it just before the plot, let’s not change our data.
US_2014 %>% mutate(shape = case_when(
shape %in% c("circle", "disk", "egg", "oval", "sphere") ~ "circular",
shape %in% c("flash", "light", "fireball") ~ "light/fire",
shape %in% c("other", "unknown","formation") |is.na(shape) ~ "unknown",
shape %in% c("cylinder", "cigar") ~ "cylindrical",
.default = shape)) %>%
ggplot(aes(x = shape, fill = shape)) + geom_bar() +
theme(legend.position = "none" , text = element_text(size = 15))
Nice, let’s also organize the columns.
library(forcats)
US_2014 %>% mutate(shape = case_when(
shape %in% c("circle", "disk", "egg", "oval", "sphere") ~ "circular",
shape %in% c("flash", "light", "fireball") ~ "light/fire",
shape %in% c("other", "unknown","formation") |is.na(shape) ~ "unknown",
shape %in% c("cylinder", "cigar") ~ "cylindrical",
.default = shape)) %>%
mutate(shape = factor(shape)) %>% group_by(shape) %>%summarise(frequency = n()) %>%
ggplot(aes(x = fct_reorder(shape, frequency), y = frequency, fill = shape)) + geom_col()+
labs(x = "Shape of UFO", y = "Mumber of sightings", title = "Number of UFO sightings by shape")+
theme(legend.position = "none" , text = element_text(size = 15))
Now we can clearly see that most sightings have some sort of circular shape or a flash of light.
Next, maybe we are wondering which states have the most sightings.
US_2014 %>% group_by(state) %>% summarise(frequency = n()) %>% arrange(desc(frequency)) %>% mutate(percent = (frequency/sum(frequency)*100)) %>%
ggplot(aes(x = as_factor(state), y = frequency))+ geom_col(fill = "blue") + theme_linedraw()+
labs(x = "Frequency of UFO Sightings", y = "State") +
theme( text = element_text(size = 15), axis.text.x = element_text(angle = 60))
Let’s say we were really interested in NM because of Roswell. We could highlight NM specifically to be a different color.
US_2014 %>% group_by(state) %>% summarise(frequency = n()) %>% arrange(desc(frequency)) %>% mutate(percent = (frequency/sum(frequency)*100)) %>%
mutate(fill_color = case_when(state =="nm" ~"nm", .default = "not_nm"))%>%
ggplot(aes(x = as_factor(state), y = frequency, fill = fill_color))+ geom_col() + theme_linedraw()+
labs(y = "Frequency of UFO Sightings", x = "State") + theme(legend.position = "none" , text = element_text(size = 15), axis.text.x = element_text(angle = 60))
We can also use R to look at when sightings typically occur.
UFO_time <-UFO %>% separate(datetime,into = c("date", "time"), sep = " ") %>% mutate(date = mdy(date)) %>% separate(time, into= c("hour", "min")) %>% mutate(hour = as.numeric(hour), min = as.numeric(min)) %>% filter(hour <=24)
UFO_time <- UFO_time %>% mutate(timespan = case_when(hour %in%c(18,19,20,21)~ "Evening",
hour >21 ~ "Night",
hour >=0 & hour <12 ~ "Morning",
hour >=12 & hour <18 ~ "Afternoon"))
summarized_data <-UFO_time%>% group_by(hour, timespan) %>% summarize(frequency = n())
ggplot() +geom_point(data =summarized_data, aes(y = frequency, x = hour))+
theme( text = element_text(size = 15))
Let’s change this so that the time values are more of what we might typically be used to. We can also add some color to indicate different timespans of the day.
# Create a data frame for the rectangles
rectangles <- data.frame(
xmin = c(0, 12, 18, 21),
xmax = c(12, 18, 21, 24),
ymin = 0,
ymax = max(summarized_data$frequency),
timespan = c("Morning", "Afternoon", "Evening", "Night") # Adjust accordingly
)
# Plot
summarized_data <-mutate(summarized_data, time_regular = case_when(
hour == 0 ~ "12 AM",
hour < 12 ~ paste0(hour, " AM"),
hour == 12 ~ "12 PM",
TRUE ~ paste0(hour - 12, " PM")
))
summarized_data <-summarized_data %>% mutate(time_regular = as.factor(time_regular))
ggplot() + geom_rect(data = rectangles, aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax, fill = timespan), alpha = 0.2) + geom_point(data =summarized_data, aes(y = frequency, x = time_regular)) +
labs(x = "Hour", y = "Frequency") +
theme_linedraw() +
theme(legend.position = "bottom" , text = element_text(size = 15))
Now let’s take a look at a map of sightings in 2014, R can easily allow us to create a heatmap.
library(maps)
state_sightings <- US_2014 %>% group_by(state) %>% summarise(frequency = n())
# Get the map data for the US
us_map <- map_data("state")
state_names <- tibble(state = tolower(state.name), abrev = tolower(state.abb))
us_map <-full_join(us_map, state_names, by = c("region" = "state") )
# Merge the sightings data with the map data
us_map <- us_map %>%
full_join(state_sightings, by = c("abrev" = "state"))
# Create the heat map
ggplot(data = us_map, aes(x = long, y = lat, group = group, fill = frequency)) +
geom_polygon(color = "white") +
coord_fixed(1.3) +
theme_minimal() +
labs(title = "UFO Sightings in the US in 2014", fill = "Number of Sightings") +
scale_fill_gradient(low = "lightblue", high = "darkblue", na.value = "white") + theme_void()
How does this compare if we include all the dates in the dataset:
library(maps)
state_sightings <- UFO %>% filter(country_new=="US") %>% group_by(state) %>% summarise(frequency = n())
# Get the map data for the US
us_map <- map_data("state")
state_names <- tibble(state = tolower(state.name), abrev = tolower(state.abb))
us_map <-full_join(us_map, state_names, by = c("region" = "state") )
# Merge the sightings data with the map data
us_map <- us_map %>%
full_join(state_sightings, by = c("abrev" = "state"))
# Create the heat map
ggplot(data = us_map, aes(x = long, y = lat, group = group, fill = frequency)) +
geom_polygon(color = "white") +
coord_fixed(1.3) +
theme_minimal() +
labs(title = "UFO Sightings in the US through 2014", fill = "Number of Sightings") +
scale_fill_gradient(low = "lightblue", high = "darkblue", na.value = "white") + theme_void()
Now we will add interactive features to our plot. First we will plot with text about one of the sightings comments for each city.
library(plotly)
nm_map <- map_data("state", region = "new mexico")
md_map <- map_data("state", region = "maryland")
viz <- UFO %>%
filter(country_new == "US", state == "nm", str_detect(datetime, "2014")) %>%
group_by(city) %>%
mutate(city_frequency = n()) %>%
ungroup() %>%
ggplot(aes(as.numeric(longitude), as.numeric(latitude))) +
geom_polygon(data = nm_map, aes(x = long, y = lat), fill = NA, color = "black") + # New Mexico borders
coord_equal() +
geom_point(aes(text = comments, size = city_frequency), colour = "red", alpha = 1/2)
viz <- UFO %>%
filter(country_new == "US", state == "nm", as.numeric(longitude) < -90) %>%
group_by(city) %>%
mutate(city_frequency = n()) %>%
ungroup() %>%
ggplot(aes(as.numeric(longitude), as.numeric(latitude))) +
geom_polygon(data = nm_map, aes(x = long, y = lat), fill = NA, color = "black") + # New Mexico borders
coord_equal() +
geom_point(aes(text = c(comments), size = city_frequency), colour = "red", alpha = 1/2)
ggplotly(viz, tooltip = c("text", "size"))
Or we will add text about the frequency within the city.
viz <- UFO %>%
filter(country_new == "US", state == "md", str_detect(datetime, "2014")) %>%
group_by(city) %>%
mutate(city_frequency = n()) %>%
ungroup() %>%
ggplot(aes(as.numeric(longitude), as.numeric(latitude))) +
geom_polygon(data = md_map, aes(x = long, y = lat), fill = NA, color = "black") + # New Mexico borders
coord_equal() +
geom_point(aes(text = c(comments), size = city_frequency), colour = "red", alpha = 1/2)
ggplotly(viz, tooltip = c("text", "size"))
We can also make our plot code into a function, so that we can just plug in the state we want and get an interactive plot!
make_ufo_state_map_city<-function(state_for_map){
state_map <- map_data("state", region = state_for_map)
state_abv <- state_names %>% filter(state== state_for_map) %>% pull(abrev)
viz <- UFO %>%
filter(country_new == "US", state == state_abv, str_detect(datetime, "2014")) %>%
filter(longitude>= min(state_map$long) & latitude <=max(state_map$lat)) %>%
group_by(city) %>%
mutate(city_frequency = n()) %>%
ungroup() %>%
ggplot(aes(as.numeric(longitude), as.numeric(latitude))) +
geom_polygon(data = state_map, aes(x = long, y = lat), fill = NA, color = "black") + # state borders
coord_equal() +
geom_point(aes(text = city, size = city_frequency), colour = "red", alpha = 1/2)+
xlim(min(state_map$long), max(state_map$long))+ ylim(min(state_map$lat), max(state_map$lat)) +
theme_void() + theme(axis.line = element_blank())
ggplotly(viz, tooltip = c("text", "size"))
}
make_ufo_state_map_comments<-function(state_for_map){
state_map <- map_data("state", region = state_for_map)
state_abv <- state_names %>% filter(state== state_for_map) %>% pull(abrev)
viz <- UFO %>%
filter(country_new == "US", state == state_abv, str_detect(datetime, "2014")) %>%
filter(longitude>= min(state_map$long) & latitude <=max(state_map$lat)) %>%
group_by(city) %>%
mutate(city_frequency = n()) %>%
ungroup() %>%
ggplot(aes(as.numeric(longitude), as.numeric(latitude))) +
geom_polygon(data = state_map, aes(x = long, y = lat), fill = NA, color = "black") + # state borders
coord_equal() +
geom_point(aes(text = comments, size = city_frequency), colour = "red", alpha = 1/2)+
xlim(min(state_map$long), max(state_map$long))+ ylim(min(state_map$lat), max(state_map$lat)) +
theme_void() + theme(axis.line = element_blank())
ggplotly(viz, tooltip = c("text", "size"))
}
Now ket’s try it for Louisiana. We only need a single line of code for each plot! We could even make this into an app so people could interactively pick their state of interest.
make_ufo_state_map_city(state_for_map = "louisiana")
make_ufo_state_map_comments(state_for_map = "louisiana")